#!/usr/bin/env python
"""
gpcheckperf -- Check the hardware for Greenplum Database

Usage: gpcheckperf <options>

    --version  : print version information
    -d segdir  : where segdir is a directory on which to test I/O performance.
                 Multiple -d flags may be specified for multiple directories
                 on each host. These will typically be your Greenplum primary
                 (and/or mirror) data directories.
                 e.g. -d /dbfast1 -d /dbfast2
    -?         : print this help text
    -v         : enable verbose
    -V         : enable very verbose
    -D         : print per host disk stats on disk read/write tests
    -r flags   : where flags specifies the tests to run [default=dsn].
                 Supported test flags are:
                       d: runs only disk tests
                       s: runs only stream benchmark
                       n: runs only netperf tests (serial)
                       N: runs only netperf tests (parallel)
                       M: runs only netperf tests (full matrix)
    -B size    : where size specifies the block size for disk performance tests
                 [default=32KB] e.g. 1KB, 4MB
    -S size    : where size specifies the file size for disk performance tests
                 [default=2X Memory] e.g. 500MB, 16GB
    -h host    : the host to connect to (multiple -h is okay)
    -f file    : a file listing all hosts to connect to
    --duration : how long to run network test (default 5 seconds)
    --netperf  : use netperf instead of gpnetbenchServer/gpnetbenchClient
    --buffer-size : the size of the send buffer in kilobytes ( default 32 kilobytes)
"""

import datetime
import os
import sys

sys.path.append(sys.path[0] + '/lib')
try:
    import getopt, math, StringIO, stat, subprocess, signal
    from gppylib.gpparseopts import OptParser
    from gppylib.util import ssh_utils
    from gppylib.commands import unix
    from gppylib.commands.base import *
except ImportError, e:
    sys.exit('Error: unable to import module: ' + str(e))

GPHOME = os.getenv('GPHOME')
if GPHOME is None:
    sys.exit('Please set GPHOME environment variable')

USER = os.getenv('USER')
if USER is None or USER is ' ':
    sys.exit('Please set USER environment variable')

_SCP_ENABLED = True


def usage(exitarg):
    parser = OptParser()
    try:
        parser.print_help()
    except:
        print __doc__
    sys.exit(exitarg)


class Global():
    script_name = os.path.split(__file__)[-1]
    opt = {'-d': [], '-D': False, '-v': False, '-V': False, '-r': '',
           '-B': 1024 * 32, '-S': 0, '-h': [], '-f': None,
           '--duration': 15, '--net': None, '--netserver': 'gpnetbenchServer',
           '--netclient': 'gpnetbenchClient', '--buffer-size': 0}


GV = Global()


def killall(procname):
    return '(pkill %s || killall -9 %s) > /dev/null 2>&1 || true' % (procname[0:15], procname)


def strcmd(cmd):
    return reduce(lambda x, y: x + ' ' + y, map(lambda x: x.find(' ') > 0 and "'" + x + "'" or x, cmd))


def gpssh(cmd, verbose):
    c = ['%s/bin/gpssh' % GPHOME]
    if verbose:
        c.append('-v')
    if GV.opt['-f']:
        c.append('-f')
        c.append(GV.opt['-f'])
    else:
        for h in GV.opt['-h']:
            c.append('-h')
            c.append(h)
    c.append(cmd)

    if GV.opt['-v']:
        print '[Info]', strcmd(c)
    p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    out = p.stdout.read(-1)
    rc = p.wait()
    return not rc, out


def gpscp(src, dst):
    c = ['%s/bin/gpscp' % GPHOME]
    if GV.opt['-V']:
        c.append('-v')
    if GV.opt['-f']:
        c.append('-f')
        c.append(GV.opt['-f'])
    else:
        for h in GV.opt['-h']:
            c.append('-h')
            c.append(h)
    c.append(src)
    c.append(dst)
    if GV.opt['-v']:
        print '[Info]', strcmd(c)
    p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    out = p.stdout.read(-1)
    rc = p.wait()
    if rc:
        sys.exit('[Error] command failed: gpscp {0} =:{1} with output: {2}'.format(src, dst, out))


def run_cmd(cmd, peer):
    """
    To create subprocess on specified host with given command
    """
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    proc.x_cmd = cmd
    proc.x_peer = peer
    return proc


def gpsync(src, dst):
    """
    It copies files either to or from a remote host, or locally on the current host.
    rsync options:
    -a: archive mode, copy all attributes of the original files
    -c : copy files based on checksum, it will override default behaviour of rsync
         to skip transferring files with same modification time and size
    -e: to specify ssh options
    -P : print information showing the progress of the transfer
    """
    proc = []
    for peer in GV.opt['-h']:
        cmd = 'rsync -P -a -c -e "ssh -o BatchMode=yes -o StrictHostKeyChecking=no" {0} {1}:{2}' \
            .format(src, unix.canonicalize(peer), dst)
        if GV.opt['-v']:
            print('[Info] {0}' .format(cmd))
        proc.append(run_cmd(cmd, peer))

    for p in proc:
        for line in p.stdout.readlines():
            print('[Out {0}] {1}' .format(p.x_peer, line))
        rc = p.wait()
        if rc:
            sys.exit('[Error] command failed for host:{0} cmd:{1} with status:{2} error: "{3}"'
                     .format(p.x_peer, p.x_cmd, rc, p.stderr.read()))


def getPlatform():
    if sys.platform.find('linux') >= 0:
        return 'linux'
    if sys.platform.find('darwin') >= 0:
        return 'darwin'
    return '?'


def getMemory():
    platform = getPlatform()

    if platform == 'linux':
        rc, out = run("sh -c 'cat /proc/meminfo | grep MemTotal'")
        if rc != 0:
            return None
        word_list = out.strip().split(' ')
        val = int(word_list[len(word_list) - 2])
        factor = word_list[len(word_list) - 1]
        if factor == 'kB':
            return val * 1024 if val else None

    if platform == 'darwin':
        rc, out = run("/usr/sbin/sysctl hw.physmem")
        if rc != 0:
            return None
        word_list = out.strip().split(' ')
        val = int(word_list[1])
        return val if val else None

    return None


def parseMemorySize(line):
    factor = 1
    try:
        line = line.strip().upper()
        if line.endswith('B'):
            line = line[:-1]
        if line.endswith('G'):
            factor = 1024 * 1024 * 1024
        elif line.endswith('M'):
            factor = 1024 * 1024
        elif line.endswith('K'):
            factor = 1024
        if factor > 1:
            line = line[:-1]
        return int(line) * factor
    except Exception, err:
        print 'Error: ' + err
        sys.exit(-1)


def parseDuration(val):
    factor = 1
    try:
        val = val.strip().upper()
        if val.endswith('S'):
            val = val[:-1]
        if val.endswith('M'):
            factor = 60
        if val.endswith('H'):
            factor = 3600
        if val.endswith('D'):
            factor = 86400
        if factor > 1:
            val = val[:-1]
        return int(val) * factor
    except Exception, err:
        print "exception: %s" % err
        sys.exit(-1)


def print_version():
    print '%s version $Revision$' % GV.script_name
    sys.exit(0)


def parseCommandLine():
    try:
        (options, args) = getopt.getopt(sys.argv[1:], '?vVDd:r:B:S:p:h:f:', ['duration=', 'version', 'netperf', 'buffer-size='])
    except Exception, e:
        usage('Error: ' + str(e))
        exit(1)

    for (switch, val) in options:
        if switch == '-?':
            usage(0)
        elif switch[1] in 'vVD':
            GV.opt[switch] = True
        elif switch[1] in 'dh':
            GV.opt[switch].append(val)
        elif switch[1] in 'rf':
            GV.opt[switch] = val
        elif switch[1] in 'BS':
            GV.opt[switch] = parseMemorySize(val)
        elif switch == '--duration':
            GV.opt[switch] = parseDuration(val)
        elif switch == '--version':
            print_version()
        elif switch == '--netperf':
            GV.opt['--netserver'] = 'netserver'
            GV.opt['--netclient'] = 'netperf'
        elif switch == '--buffer-size':
            GV.opt[switch] = int(val)

    # run default tests (if not specified)
    if GV.opt['-r'] == '':
        GV.opt['-r'] = 'dsN'

    # parse netperf test type, error if more than one specified
    netcount = 0
    if GV.opt['-r'].find('n') >= 0:
        GV.opt['--net'] = 'netperf'
        netcount += 1
    if GV.opt['-r'].find('N') >= 0:
        GV.opt['--net'] = 'parallel'
        netcount += 1
    if GV.opt['-r'].find('M') >= 0:
        GV.opt['--net'] = 'matrix'
        netcount += 1
    if netcount > 1:
        usage('Error: please only specify one network test at a time (-r [n|M|N])')

    if GV.opt['-V']:
        GV.opt['-v'] = True

    if len(GV.opt['-d']) == 0:
        usage('Error: please specify at least one segdir (-d switch)')

    if not (0 <= GV.opt['-B'] and 0 <= GV.opt['-S']):
        usage('Error: please enter valid memory sizes for -B or -S switches')

    hf = (len(GV.opt['-h']) and 1 or 0) + (GV.opt['-f'] and 1 or 0)
    if hf != 1:
        usage('Error: please specify at least one of -h or -f args, but not both')

    if (GV.opt['-B']) > 1024 * 1024:
        usage('Error: maximum size for -B parameter is 1MB')

    if GV.opt['-S'] == 0:
        system_mem_size = getMemory()
        if system_mem_size is not None:
            GV.opt['-S'] = 2 * system_mem_size / len(GV.opt['-d'])
        else:
            sys.exit('[Error] could not get system memory size. Instead, you can use the -S option to provide the file size value')

    else:
        GV.opt['-S'] /= len(GV.opt['-d'])

    if GV.opt['--duration'] <= 0:
        GV.opt['--duration'] = 15
        print '[INFO] Invalid network duration specified.  Using default (15 seconds)'

    if GV.opt['--netclient'].find('netperf') >= 0:
        if GV.opt['--buffer-size']:
            print('[Warning] --buffer-size option will be ignored when the --netperf option is enabled')
    else:
        if GV.opt['--buffer-size'] <= 0:
            print('[INFO] --buffer-size value is not specified or invalid. Using default (32 kilobytes)')
            GV.opt['--buffer-size'] = 32

    # strip the last '/' from the dir
    dd = []
    for d in GV.opt['-d']:
        if d and d[-1] == '/':
            d = d[:-1]
        dd.append(d)
    GV.opt['-d'] = dd

    def strcmd(cmd):
        return reduce(lambda x, y: x + ' ' + y, map(lambda x: x.find(' ') > 0 and "'" + x + "'" or x, cmd))

    print strcmd(sys.argv)


def run(cmd):
    try:
        if GV.opt['-v']:
            print '[Info]', cmd
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
        out = p.stdout.read(-1)
        ok = p.wait()
    except KeyboardInterrupt:
        raise

    if GV.opt['-v'] and out and out != '':
        print out

    if out and out.find('Permission denied') >= 0 and out.find('publickey') >= 0:
        if not GV.opt['-v']:
            print out
        print 'Please use gpssh-exkeys to exchange keys ...'
        sys.exit(1)

    return ok, out


runTeardown_torndown = False


def runTeardown():
    global runTeardown_torndown
    if runTeardown_torndown:
        return
    if GV.opt['-v']:
        print '--------------------'
        print '  TEARDOWN'
        print '--------------------'
    dirs = ''
    for d in GV.opt['-d']:
        dirs = '%s %s/gpcheckperf_$USER' % (dirs, d)
    try:
        gpssh('rm -rf ' + dirs, GV.opt['-V'])
    except:
        pass

    try:
        if GV.opt['--net']:
            gpssh(killall(GV.opt['--netserver']), GV.opt['-V'])
    except:
        pass

    runTeardown_torndown = True


def runSetup():
    if GV.opt['-v']:
        print '--------------------'
        print '  SETUP %s' % datetime.datetime.now().isoformat()
        print '--------------------'
    okCount = 0
    try:
        # check python reachable
        if GV.opt['-v']:
            print '[Info] verify python interpreter exists'
        (ok, out) = gpssh('python -c print', GV.opt['-V'])
        if not ok:
            if not GV.opt['-v']:
                print out
            sys.exit("[Error] unable to find python interpreter on some hosts\n"
                     + "        verify PATH variables on the hosts")

            # mkdir cperf
        if GV.opt['-v']:
            print '[Info] making gpcheckperf directory on all hosts ... '
        dirs = ''
        for d in GV.opt['-d']:
            dirs = '%s %s/gpcheckperf_$USER' % (dirs, d)

        cmd = 'rm -rf %s ; mkdir -p %s' % (dirs, dirs)
        (ok, out) = gpssh(cmd, GV.opt['-V'])
        if not ok:
            print 'failed gpssh: %s' % out
            sys.exit("[Error] unable to make gpcheckperf directory. \n"
                     + "        command failed: " + cmd)

    except:
        runTeardown()
        raise


def copyExecOver(fname):
    # copy to this dir
    d = GV.opt['-d'][0]
    target = '%s/gpcheckperf_$USER/%s' % (d, fname)
    path = '%s/lib/%s' % (os.path.dirname(__file__), fname)

    if not os.path.isfile(path):
        print "binary file not found: %s" % path
        sys.exit(1)

    if GV.opt['-v']:
        print '[Info] copy local %s to remote %s' % (path, target)

    try:
        pathStat = os.stat(path)
    except:
        sys.exit('[Error] cannot stat file ' + path)
    if not stat.S_ISREG(pathStat.st_mode):
        sys.exit('[Error] invalid file ' + path)
    if not os.access(path, os.X_OK):
        sys.exit('[Exit] file not executable: ' + path)

    if _SCP_ENABLED:
        gpscp(path, '=:{0}' .format(target))
    else:
        gpsync(path, '{0}'.format(target))

    # chmod +x file
    (ok, out) = gpssh('chmod a+rx %s' % target, GV.opt['-V'])
    if not ok:
        sys.exit('[Error] command failed: chmod a+rx %s with output: %s' % (target, out))

    return target


def parseMultiDDResult(out):
    # parse output of "time -p"
    out = StringIO.StringIO(out)
    result = {}
    bytes = 0
    for line in out:
        i = line.find(']')
        if i == -1:
            continue
        o = line[i + 2:]

        if o.startswith('multidd total bytes '):
            h = line[1:i]
            o = o.split()
            bytes = int(o[-1])
            continue

        if o.startswith('real'):
            h = line[1:i]
            o = o.split()
            time = float(o[1])
            if time < 0.1:
                time = 0.1  # avoid division by zero
            mbps = bytes / time / 1024 / 1024
            result[h] = (mbps, time, bytes)
            continue

    return result


def runDiskWriteTest(multidd):
    print
    print '--------------------'
    print '--  DISK WRITE TEST'
    print '--------------------'

    cmd = 'time -p ' + multidd
    for d in GV.opt['-d']:
        cmd = cmd + (' -i /dev/zero -o %s/gpcheckperf_$USER/ddfile' % d)
    if GV.opt['-B']:
        cmd = cmd + (' -B %d' % GV.opt['-B'])
    if GV.opt['-S']:
        cmd = cmd + (' -S %d' % GV.opt['-S'])
    (ok, out) = gpssh(cmd, GV.opt['-V'])
    if not ok:
        sys.exit('[Error] command failed: %s with output: %s' % (cmd, out))
    return parseMultiDDResult(out)


def runDiskReadTest(multidd):
    print
    print '--------------------'
    print '--  DISK READ TEST'
    print '--------------------'

    cmd = 'time -p ' + multidd
    for d in GV.opt['-d']:
        cmd = cmd + (' -o /dev/null -i %s/gpcheckperf_$USER/ddfile' % d)
    if GV.opt['-B']:
        cmd = cmd + (' -B %d' % GV.opt['-B'])
    if GV.opt['-S']:
        cmd = cmd + (' -S %d' % GV.opt['-S'])
    (ok, out) = gpssh(cmd, GV.opt['-V'])
    if not ok:
        sys.exit('[Error] command failed: %s with output: %s' % (cmd, out))
    return parseMultiDDResult(out)


def runStreamTest():
    print
    print '--------------------'
    print '--  STREAM TEST'
    print '--------------------'

    cmd = copyExecOver('stream')
    (ok, out) = gpssh(cmd, GV.opt['-V'])
    if not ok:
        sys.exit('[Error] command failed: %s with output: %s' % (cmd, out))
    out = StringIO.StringIO(out)
    result = {}
    for line in out:
        i = line.find(']')
        o = line[i + 2:]
        if o.startswith('Copy:'):
            h = line[1:i]
            o = o.split()
            result[h] = (float(o[1]), 0, 0)
    return result


def startNetServer():
    port = 23000
    rmtPath = copyExecOver(GV.opt['--netserver'])

    for i in xrange(5):
        if i > 0:
            print '[Warning] retrying with port %d' % port
        (ok, out) = gpssh(killall(GV.opt['--netserver']), GV.opt['-V'])

        (ok, out) = gpssh('%s -p %d > /dev/null 2>&1' % (rmtPath, port), GV.opt['-V'])
        if ok:
            return port

        if GV.opt['-v']:
            print '[Warning] start netserver with port %d failed' % port
        port += 12

    return 0


def killProc(proc):
    if proc:
        try:
            os.kill(proc.pid, signal.SIGKILL)
        except OSError:
            pass
        return proc.wait()
    return 0


def spawnNetperfTestBetween(x, y, netperf_path, netserver_port, sec=5):
    if GV.opt['--netclient'] == 'netperf' and GV.opt['--netserver'] == 'netserver':
        cmd = ('%s -H %s -p %d -t TCP_STREAM -l %s -f M -P 0 '
               % (netperf_path, y, netserver_port, sec))
    else:
        cmd = ('%s -H %s -p %d -l %s -P 0 -b %s'
               % (netperf_path, y, netserver_port, sec, GV.opt['--buffer-size']))

    c = ['ssh', '-o', 'BatchMode yes',
         '-o', 'StrictHostKeyChecking no',
         x, cmd]
    proc = None
    try:
        if GV.opt['-v']:
            print '[Info]', strcmd(c)
        proc = subprocess.Popen(c, stdout=subprocess.PIPE)
    except KeyboardInterrupt:
        killProc(proc)
        raise
    if not proc:
        print '[Warning] netperf failed on %s -> %s' % (x, y)
        return None, None, None

    return proc, x, y


def reapNetperfTest(proc, x, y):
    ok = True
    out = None
    if proc:
        try:
            rc = proc.wait()
            out = proc.stdout.read(-1)
            ok = not killProc(proc)
            proc = None
        except KeyboardInterrupt, ki:
            raise
        except:
            pass
        finally:
            proc = None

    if GV.opt['-v'] and out and out != '':
        print '[Info]', out
    if out and out.find('Permission denied') >= 0 and out.find('publickey') >= 0:
        if not GV.opt['-v']:
            print out
        print '[Error] Please use gpssh-exkeys to exchange keys ...'
        sys.exit(1)

    if not ok:
        print '[Warning] netperf failed on %s -> %s' % (x, y)
        return []

    for line in StringIO.StringIO(out):
        line = line.split()
        if len(line) != 5:
            continue
        l = [x, y]
        l.extend(map(lambda x: float(x), line))
        if GV.opt['-v']:
            print '[Info] %s -> %s : %s' % (x, y, str(line))
        return l

    print '[Warning] netperf failed on %s -> %s (invalid result)' % (x, y)
    print '[Info]', out
    return []


def runNetperfTestBetween(x, y, netperf_path, netserver_port):
    (proc, x, y) = spawnNetperfTestBetween(x, y, netperf_path, netserver_port, GV.opt['--duration'])
    if not proc:
        return []

    return reapNetperfTest(proc, x, y)


def setupNetPerfTest():
    print
    print '-------------------'
    print '--  NETPERF TEST'
    print '-------------------'

    hostlist = ssh_utils.HostList()
    for h in GV.opt['-h']:
        hostlist.add(h)
    if GV.opt['-f']:
        hostlist.parseFile(GV.opt['-f'])

    h = hostlist.get()
    if len(h) == 0:
        usage('Error: missing hosts in -h and/or -f arguments')

    if len(h) == 1:
        print '[Warning] single host only - abandon netperf test'
        return (None, None, None)

    # start netserver
    netserver_port = startNetServer()
    if netserver_port == 0:
        print '[Error] unable to start netserver ... abort netperf test'
        return (None, None, None)

    netperf = copyExecOver(GV.opt['--netclient'])
    return (netperf, h, netserver_port)


def runNetPerfTest():
    (netperf, hostlist, netserver_port) = setupNetPerfTest()
    if not netperf:
        return None

    # make sure we have even number of hosts
    odd = None
    h = hostlist[:]
    if len(h) & 1 == 1:
        odd = h.pop()  # take one out

    # run netperf test between host[0] & host[1], host[2] & host[3], ...
    result = []
    for i in xrange(len(h)):
        if i & 1 == 0:
            continue
        x = h[i - 1]
        y = h[i]
        res = runNetperfTestBetween(x, y, netperf, netserver_port)
        if res and len(res) >= 6:
            result.append(res)
        res = runNetperfTestBetween(y, x, netperf, netserver_port)
        if res and len(res) >= 6:
            result.append(res)

    # run netperf test between the odd host and host 0
    if odd:
        x = odd
        y = h[0]
        res = runNetperfTestBetween(x, y, netperf, netserver_port)
        if res and len(res) >= 6:
            result.append(res)
        res = runNetperfTestBetween(y, x, netperf, netserver_port)
        if res and len(res) >= 6:
            result.append(res)

    return result


def runNetPerfTestParallel():
    (netperf, hostlist, netserver_port) = setupNetPerfTest()
    if not netperf:
        return None

    # make sure we have even number of hosts
    h = hostlist[:]
    if len(h) & 1 == 1:
        h.append(h[0])

    # run netperf test between host[0] & host[1], host[2] & host[3], ...
    result = []
    procList = []
    for i in xrange(len(h)):
        if i & 1 == 0:
            continue
        x = h[i - 1]
        y = h[i]
        p = spawnNetperfTestBetween(x, y, netperf, netserver_port, GV.opt['--duration'])
        if p[0]:
            procList.append(p)

    for (proc, x, y) in procList:
        res = reapNetperfTest(proc, x, y)
        if res and len(res) >= 6:
            result.append(res)

    # second round: test between host[1] & host[0], host[3] & host[2], ...
    procList = []
    for i in xrange(len(h)):
        if i & 1 == 0:
            continue
        x = h[i - 1]
        y = h[i]
        p = spawnNetperfTestBetween(y, x, netperf, netserver_port, GV.opt['--duration'])
        if p[0]:
            procList.append(p)

    for (proc, x, y) in procList:
        res = reapNetperfTest(proc, x, y)
        if res and len(res) >= 6:
            result.append(res)

    return result


def get_host_map(hostlist):
    '''
        Returns a seglist dictionary that maps segment name to hostname and
        a uniqhosts dictionary that maps each hostname to one segment name.

        For example:
        seglist =
            seglist['sdw1-1'] = apollo1
            seglist['sdw1-2'] = apollo1
            seglist['sdw2-1'] = apollo2
            seglist['sdw2-2'] = apollo2
        uniqhosts =
            uniqhosts['apollo1'] = 'sdw1-1'
            uniqhosts['apollo2'] = 'sdw2-1'
    '''
    seglist = dict()  # segment list
    uniqhosts = dict()  # unique host list

    '''
     Get hostnames using non-verbose mode since verbose output makes parsing difficult with extra lines as show:
        $gpssh -v -h sdw2 hostname
        [WARN] Reference default values as $MASTER_DATA_DIRECTORY/gpssh.conf could not be found
        Using delaybeforesend 0.05 and prompt_validation_timeout 1.0
        [Reset ...]
        [INFO] login sdw2
        [sdw2] sdw2
        [INFO] completed successfully
        [Cleanup...]
    '''
    rc, out = gpssh('hostname', False)
    if not rc:
        raise Exception('Encountered error running hostname')

    # get unique hostname list
    for line in out.splitlines():
        seg, host = line.translate(None, '[]').split()
        uniqhosts[host] = seg

    # get list of segments associated with each host (can't use gpssh since it de-dupes hosts)
    for host in hostlist:
        cmd = ['ssh', '-o', 'BatchMode yes', '-o', 'StrictHostKeyChecking no', host, 'hostname']

        proc = None
        try:
            if GV.opt['-v']: print '[Info]', strcmd(cmd)
            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            out = proc.stdout.read(-1)
            rc = proc.wait()
            if rc:
                raise Exception('ssh error with the following command:\n%s with output: %s' % (cmd, out))
            seglist[host] = out.strip()
        except KeyboardInterrupt:
            killProc(proc)
            raise
        except Exception, err:
            print err
            sys.exit(-1)

    return seglist, uniqhosts


def runNetPerfTestMatrix():
    '''
        Runs full matrix network test where each host connnects with
            every link on every other host.
        Returns performance results and a dictionary mapping segment_name
            to hostname
    '''
    (netperf, hostlist, netserver_port) = setupNetPerfTest()
    if not netperf:
        return None, None

    # dict() of seglist[segname] = hostname, uniqhosts[hostname] = 1 segment name
    seglist, uniqhosts = get_host_map(hostlist)

    # spawn netperf between all hosts
    result = []
    procList = []
    for host in uniqhosts:
        for segment in seglist:
            if seglist[segment] == host:
                pass
            else:
                p = spawnNetperfTestBetween(host, segment, netperf, netserver_port, GV.opt['--duration'])
                if p[0]:
                    procList.append(p)

    for (proc, srcHost, dstHost) in procList:
        res = reapNetperfTest(proc, srcHost, dstHost)
        if res and len(res) >= 6:
            result.append(res)

    return result, seglist


def printMatrixResult(result, seglist):
    print 'Full matrix netperf bandwidth test'

    # sum up Rx/Tx rate for each host
    netTx = dict()
    netRx = dict()
    for h in result:
        if netTx.has_key(h[0]):
            netTx[h[0]] += float(h[6])
        else:
            netTx[h[0]] = float(h[6])

        # netRx requires that we lookup the hostname for a given segment name
        if netRx.has_key(seglist[h[1]]):
            netRx[seglist[h[1]]] += float(h[6])
        else:
            netRx[seglist[h[1]]] = float(h[6])

    # print Tx rates
    print "\nPer host transfer rates"
    for host in netTx:
        print '%s Tx rate: %.2f' % (host, netTx[host])
    print

    # print Rx rates
    print "Per host receive rates"
    for host in netRx:
        print '%s Rx rate: %.2f' % (host, netRx[host])
    print

    # print per link results in verbose mode
    if GV.opt['-v']:
        for h in result:
            print '%s -> %s = %f' % (h[0], h[1], h[6])

    sum = min = max = avg = 0
    n = map(lambda x: float(x[6]), result)
    sum = reduce(lambda x, y: x + y, n)
    min = reduce(lambda x, y: x > y and y or x, n, 9999999999)
    max = reduce(lambda x, y: x > y and x or y, n)
    avg = float(sum) / len(result)

    copy = n[:]
    copy.sort()
    median = copy[len(copy) / 2]

    print ''
    print 'Summary:'
    print 'sum = %.2f MB/sec' % sum
    print 'min = %.2f MB/sec' % min
    print 'max = %.2f MB/sec' % max
    print 'avg = %.2f MB/sec' % avg
    print 'median = %.2f MB/sec' % median
    print ''


def printNetResult(result):
    print 'Netperf bisection bandwidth test'
    for h in result:
        print '%s -> %s = %f' % (h[0], h[1], h[6])

    sum = min = max = avg = 0
    n = map(lambda x: float(x[6]), result)
    sum = reduce(lambda x, y: x + y, n)
    min = reduce(lambda x, y: x > y and y or x, n, 9999999999)
    max = reduce(lambda x, y: x > y and x or y, n)
    avg = float(sum) / len(result)

    copy = n[:]
    copy.sort()
    median = copy[len(copy) / 2]

    print ''
    print 'Summary:'
    print 'sum = %.2f MB/sec' % sum
    print 'min = %.2f MB/sec' % min
    print 'max = %.2f MB/sec' % max
    print 'avg = %.2f MB/sec' % avg
    print 'median = %.2f MB/sec' % median
    print ''

    limit = max * 0.9
    for r in result:
        if r[6] < limit:
            print '[Warning] connection between %s and %s is no good' % (r[0], r[1])


def printResult(title, result):
    totTime = 0
    totBytes = 0
    totMBPS = 0
    min = 99999999
    max = 0
    minHost = maxHost = None
    for h in result:
        (mbps, time, bytes) = result[h]
        totTime += time
        totBytes += bytes
        totMBPS += mbps
        if min > mbps:
            min = mbps
            minHost = h
        if max < mbps:
            max = mbps
            maxHost = h

    avgTime = totTime / len(result)

    print
    if totTime > 0:
        print ' %s avg time (sec): %3.2f' % (title, avgTime)
    if totBytes > 0:
        print ' %s tot bytes: %d' % (title, totBytes)
    print ' %s tot bandwidth (MB/s): %3.2f' % (title, totMBPS)
    print ' %s min bandwidth (MB/s): %3.2f [%s]' % (title, min, minHost)
    print ' %s max bandwidth (MB/s): %3.2f [%s]' % (title, max, maxHost)
    if GV.opt['-D']:
        print ' -- per host bandwidth --'
        for h in result:
            (mbps, time, bytes) = result[h]
            print '    %s bandwidth (MB/s): %3.2f [%s]' % (title, mbps, h)
    print


def getHostList():
    """
    To get the list of hosts involved in gpcheckperf operation and passed with
    -f or -h flog
    :return: returns a list of hosts
    """
    hostlist = ssh_utils.HostList()
    for h in GV.opt['-h']:
        hostlist.add(h)
    if GV.opt['-f']:
        hostlist.parseFile(GV.opt['-f'])

    try:
        hostlist.checkSSH()
    except ssh_utils.SSHError, e:
        sys.exit('[Error] {0}' .format(str(e)))

    GV.opt['-h'] = hostlist.filterMultiHomedHosts()
    if len(GV.opt['-h']) == 0:
        usage('Error: missing hosts in -h and/or -f arguments')
    return GV.opt['-h']


def main():

    try:
        parseCommandLine()
        runSetup()
        diskWriteResult = diskReadResult = streamResult = netResult = None
        tornDown = False
        try:
            global _SCP_ENABLED
            # check if scp executable binary is present on the master and all specified host
            _SCP_ENABLED = unix.isScpEnabled(getHostList() + [unix.getLocalHostname()])

            if GV.opt['-r'].find('d') >= 0:
                print('[Warning] Using %d bytes for disk performance test. This might take some time' % (GV.opt['-S'] * len(GV.opt['-d'])))
                multidd = copyExecOver('multidd')
                diskWriteResult = runDiskWriteTest(multidd)
                diskReadResult = runDiskReadTest(multidd)

            if GV.opt['-r'].find('s') >= 0:
                streamResult = runStreamTest()

            if GV.opt['--net'] == 'netperf':
                netResult = runNetPerfTest()
            elif GV.opt['--net'] == 'parallel':
                netResult = runNetPerfTestParallel()
            elif GV.opt['--net'] == 'matrix':
                netResult, seglist = runNetPerfTestMatrix()

            runTeardown()

        finally:
            print
            print '===================='
            print '==  RESULT %s' % datetime.datetime.now().isoformat()
            print '===================='

            if diskWriteResult:
                printResult('disk write', diskWriteResult)

            if diskReadResult:
                printResult('disk read', diskReadResult)

            if streamResult:
                printResult('stream', streamResult)

            if netResult and GV.opt['--net'] == 'matrix':
                printMatrixResult(netResult, seglist)
            elif netResult and GV.opt['--net']:
                printNetResult(netResult)

            runTeardown()

    except KeyboardInterrupt:
        print('[Abort] Keyboard Interrupt ...')

if __name__ == '__main__':
     main()
